This markdown contains data from the Cech Annotation project for Ashwagandha. Samples were preprocessed in Progenesis by Luke Marney and de novo annotation was conducted using the SIRIUS suite of software by Yanni Bouranis. All data presented is DIA.

All Ashwagandha Features

All Ashwagandha samples were run through Progenesis first. No filter was used to narrow down features:

Data Overview

library(tidyverse)
library(DT)
library(ggfortify)

ProgFile <- read_csv('~/Desktop/CechData/220413_QTOF_POS_DIA_RAW.csv', skip = 2) %>%
  dplyr::select(Compound, 11:16) %>%
  column_to_rownames('Compound')

ProgT <- as.data.frame(t(ProgFile))
PCA <- prcomp(ProgT, scale. = T, center = T)

filenames <- c('Qtof_P_E3_WS03A', 'Qtof_P_E3_WS03B', 'Qtof_P_E3_WS03C', 'Qtof_P_E3_WS03A', 'Qtof_P_E3_WS03B', 'Qtof_P_E3_WS03C')

PCi <- data.frame(PCA$x, 
           filename = filenames)

ggplot(PCi, aes(x = PC1, y = PC2, color = filename)) +
  geom_point(size = 3) +
  xlab('PC1 52.51%') +
  ylab('PC2 32.32%')

Annotations

CanopusAll <- read_csv('~/Desktop/CechData/CanopusDIAOutput.csv') %>%
  rename_with(~gsub(' ', '_', .x)) %>%
  dplyr::select(-all_classifications)

datatable(CanopusAll,
           filter = 'top',
           extensions = 'Buttons',
           options = list(pageLength = 12,
                          dom = 'Bfrtip',
                          scrollX = TRUE))  

Distribution of Annotations:

To simplify the visual inspection of classes, annotations with under 3 metabolites were binned to the “other” category

With Peptides/Amino Acids

DIAF <- CanopusAll %>% 
  group_by(level_5) %>%
  summarise(n = n()) %>%
  ungroup() %>%
  drop_na() %>%
  mutate(bincat = ifelse(n < 3, 'other', level_5))

Dall <- ggplot(DIAF, aes(x = bincat, y = n)) +
  geom_col() +
  theme(axis.text.x = element_text(angle = 90))
plotly::ggplotly(Dall, width = 900, height = 600)

Without Peptides/Amino Acids

DIAFfilt <- DIAF %>%
  filter(!bincat %in% c('Amino acids and derivatives', 'Peptides'))

DF <- ggplot(DIAFfilt, aes(x = bincat, y = n)) +
  geom_col() +
  theme(axis.text.x = element_text(angle = 90))
plotly::ggplotly(DF, width = 900, height = 600)

Filtered Ashwagandha Features

The blanks looked noisy so we built a new Progenesis run with both blanks and the extracts. Features which were more abundant in the blanks were filtered out. This significantly decreased the size of our dataset from ~8000 features to ~200 features.

Data Overview

ProgFileFilt <- read_csv('~/Desktop/CechData/rerun/220420_FINAL_QTOF_POS_DIA.csv', skip = 2) %>%
  dplyr::select(Compound, 16:33) %>%
  column_to_rownames('Compound')

ProgTF <- as.data.frame(t(ProgFileFilt))
PCAF <- prcomp(ProgTF, scale. = T, center = T)

filenamesFilt <- c('Qtof_P_E3_EXBLANKA', 'Qtof_P_E3_EXBLANKB', 'Qtof_P_E3_EXBLANKC', 'Qtof_P_E3_Wasteblank4', 'Qtof_P_E3_Wasteblank5', 'Qtof_P_E3_Wasteblank6', 'Qtof_P_E3_WS03A', 'Qtof_P_E3_WS03B', 'Qtof_P_E3_WS03C')

PCiF <- data.frame(PCAF$x, 
           filename = filenamesFilt)

ggplot(PCiF, aes(x = PC1, y = PC2, color = filename)) +
  geom_point(size = 3) +
  xlab('PC1 84.27%') +
  ylab('PC2 8.79%')

While out data is already skewed to show us what we want since it has been filtered down to only features which are most abundant in the Ashwagandha extracts, we see clear separation of the extracts from the blanks.

Annotations

CanopusFilt <- read_csv('~/Desktop/CechData/rerun/FilteredDIA.csv') %>%
  rename_with(~gsub(' ', '_', .x)) %>%
  dplyr::select(-all_classifications)

datatable(CanopusFilt,
           filter = 'top',
           options = list(pageLength = 12,
                          dom = 'Bfrtip',
                          scrollX = TRUE))  

Distribution of Annotations:

DIAFilterd <- CanopusFilt %>% 
  group_by(level_5) %>%
  summarise(n = n()) %>%
  ungroup() %>%
  drop_na() %>%
  mutate(bincat = ifelse(n < 3, 'other', level_5))

Dfl <- ggplot(DIAFilterd, aes(x = level_5, y = n)) +
  geom_col() +
  theme(axis.text.x = element_text(angle = 90))
plotly::ggplotly(Dfl, width = 900, height = 600)

By filtering our dataset it looks like we lost a lot of metabolites that were originally annotated as withanolides. It will be important to analyze our blanks next to see what we are annotating in them.

Blanks

Data Overview

ProgBlanks <- read_csv('~/Desktop/CechData/rerun/blanks/BlanksRawPos.csv', skip = 2) %>%
  dplyr::select(Compound, 16:27) %>%
  column_to_rownames('Compound')

ProgB <- as.data.frame(t(ProgBlanks))
PCAB <- prcomp(ProgB, scale. = T, center = T)

filenamesB <- c('Qtof_P_E3_EXBLANKA', 'Qtof_P_E3_EXBLANKB', 'Qtof_P_E3_EXBLANKC', 'Qtof_P_E3_Wasteblank4', 'Qtof_P_E3_Wasteblank5', 'Qtof_P_E3_Wasteblank6')

PCiB <- data.frame(PCAB$x, 
           filename = filenamesB)

ggplot(PCiB, aes(x = PC1, y = PC2, color = filename)) +
  geom_point(size = 3) +
  xlab('PC1 37.1%') +
  ylab('PC2 24.79%')

Annotations

 file = '~/Desktop/CechData/rerun/blanks/blanksCanopusOutput.csv'


BlanksCanopus <- read_csv('~/Desktop/CechData/rerun/blanks/blanksCanopusOutput.csv') %>%
  rename_with(~gsub(' ', '_', .x)) %>%
  dplyr::select(-all_classifications)

datatable(BlanksCanopus,
           filter = 'top',
           options = list(pageLength = 12,
                          dom = 'Bfrtip',
                          scrollX = TRUE))  

Distribution of Annotations:

BlanksGrouped <- BlanksCanopus %>% 
  group_by(level_5) %>%
  summarise(n = n()) %>%
  ungroup() %>%
  drop_na() %>%
  mutate(bincat = ifelse(n < 3, 'other', level_5))

Bdi <- ggplot(BlanksGrouped, aes(x = bincat, y = n)) +
  geom_col() +
  theme(axis.text.x = element_text(angle = 90))

plotly::ggplotly(Bdi, width = 900, height = 600)